import scrapy
import re
import json
from scrapy.selector import Selector
from myScrapy.items import GamerskyCommentsItem

class GamerskySpider(scrapy.Spider):
    name = "gamersky"
    allowed_domains = ["gamersky.com"]
    start_urls = [
        'https://cm1.gamersky.com/api/getcomment?callback=jQuery18309495630636400962_1597721047941&jsondata={"dateType":"1","loadType":"1","pageIndex":2,"pageSize":"10","foorPageSize":5,"articleId":"972293"}&_=1597721098761',
    ]

    custom_settings = dict(
        DOWNLOAD_DELAY=5,
        CONCURRENT_REQUESTS_PER_DOMAIN=8,
        CONCURRENT_REQUESTS_PER_IP=8,
        ROBOTSTXT_OBEY = False,
        DOWNLOADER_MIDDLEWARES={
            'myScrapy.middlewares.MyUseragentMiddleware': 1000,
        },
        ITEM_PIPELINES = {
            'myScrapy.pipelines.GamerskyCommentsPipeline': 300,
        }
    )

    def parse(self, response):
        print(response.headers)
        pattern = re.compile(r'{.*}', re.I)
        res = re.search(pattern, response.text)
        jsonStr = res.group()
        # print(jsonStr)

        jsonObj = json.loads(jsonStr)
        htmlStr = jsonObj['body']
        commentObj = json.loads(htmlStr)
        htmlStr = commentObj['Comment']
        jsonResponse = Selector(text=htmlStr)
        for sel in jsonResponse.xpath('//div[@class="remark-list-floor"]'):
            user_name = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="user-name"]/a[@uid]/text()').extract()
            user_url = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="user-name"]/a[@uid]/@href').extract()

            play_info = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="txt"]/text()').extract()
            star = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="xin"]/div/@class').extract()
            comment_time = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="user-time"]/a/text()').extract()
            comment = sel.xpath('div[@class="remark-cont-wrap"]//div[@class="content"]/text()').extract()

            item = GamerskyCommentsItem()
            item['comment'] = comment[0]
            item['comment_time'] = comment_time[0]
            item['user_name'] = user_name[0]
            item['user_url'] = user_url[0]
            item['play_info'] = play_info[0]
            item['star'] = star[0]
            yield item